{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examine the Evalution Metrics\n",
    "\n",
    "Examine the resulting model evaluation after the pipeline completes. Download the resulting evaluation.json file from S3 and print the report."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from botocore.exceptions import ClientError\n",
    "\n",
    "import os\n",
    "import sagemaker\n",
    "import logging\n",
    "import boto3\n",
    "import sagemaker\n",
    "import pandas as pd\n",
    "\n",
    "sess = sagemaker.Session()\n",
    "bucket = sess.default_bucket()\n",
    "role = sagemaker.get_execution_role()\n",
    "region = boto3.Session().region_name\n",
    "\n",
    "sm = boto3.Session().client(service_name=\"sagemaker\", region_name=region)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store -r pipeline_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(pipeline_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%store -r pipeline_experiment_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(pipeline_experiment_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "\n",
    "import time\n",
    "from pprint import pprint\n",
    "\n",
    "executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)[\"PipelineExecutionSummaries\"]\n",
    "pipeline_execution_status = executions_response[0][\"PipelineExecutionStatus\"]\n",
    "print(pipeline_execution_status)\n",
    "\n",
    "while pipeline_execution_status == \"Executing\":\n",
    "    try:\n",
    "        executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)[\"PipelineExecutionSummaries\"]\n",
    "        pipeline_execution_status = executions_response[0][\"PipelineExecutionStatus\"]\n",
    "    except Exception as e:\n",
    "        print(\"Please wait...\")\n",
    "        time.sleep(30)\n",
    "\n",
    "pprint(executions_response)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# List Pipeline Execution Steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline_execution_status = executions_response[0][\"PipelineExecutionStatus\"]\n",
    "print(pipeline_execution_status)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipeline_execution_arn = executions_response[0][\"PipelineExecutionArn\"]\n",
    "print(pipeline_execution_arn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pprint import pprint\n",
    "\n",
    "steps = sm.list_pipeline_execution_steps(PipelineExecutionArn=pipeline_execution_arn)\n",
    "\n",
    "pprint(steps)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Retrieve Evaluation Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for execution_step in reversed(execution.list_steps()):\n",
    "for execution_step in reversed(steps[\"PipelineExecutionSteps\"]):\n",
    "    if execution_step[\"StepName\"] == \"EvaluateModel\":\n",
    "        processing_job_name = execution_step[\"Metadata\"][\"ProcessingJob\"][\"Arn\"].split(\"/\")[-1]\n",
    "\n",
    "describe_evaluation_processing_job_response = sm.describe_processing_job(ProcessingJobName=processing_job_name)\n",
    "\n",
    "evaluation_metrics_s3_uri = describe_evaluation_processing_job_response[\"ProcessingOutputConfig\"][\"Outputs\"][0][\n",
    "    \"S3Output\"\n",
    "][\"S3Uri\"]\n",
    "evaluation_metrics_s3_uri"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from pprint import pprint\n",
    "\n",
    "evaluation_json = sagemaker.s3.S3Downloader.read_file(\"{}/evaluation.json\".format(evaluation_metrics_s3_uri))\n",
    "\n",
    "pprint(json.loads(evaluation_json))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Download and Analyze the Trained Model from S3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "training_job_arn = None\n",
    "\n",
    "for execution_step in steps[\"PipelineExecutionSteps\"]:\n",
    "    if execution_step[\"StepName\"] == \"Train\":\n",
    "        training_job_arn = execution_step[\"Metadata\"][\"TrainingJob\"][\"Arn\"]\n",
    "\n",
    "        break\n",
    "\n",
    "training_job_name = training_job_arn.split(\"/\")[-1]\n",
    "print(training_job_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_tar_s3_uri = sm.describe_training_job(TrainingJobName=training_job_name)[\"ModelArtifacts\"][\"S3ModelArtifacts\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp $model_tar_s3_uri ./"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir -p ./model\n",
    "!tar -zxvf model.tar.gz -C ./model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "!saved_model_cli show --all --dir ./model/tensorflow/saved_model/0/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!saved_model_cli run --dir ./model/tensorflow/saved_model/0/ --tag_set serve --signature_def serving_default \\\n",
    "    --input_exprs 'input_ids=np.zeros((1,64));input_mask=np.zeros((1,64))'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# List All Artifacts Generated By The Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "processing_job_name = None\n",
    "training_job_name = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "from sagemaker.lineage.visualizer import LineageTableVisualizer\n",
    "\n",
    "viz = LineageTableVisualizer(sagemaker.session.Session())\n",
    "\n",
    "for execution_step in reversed(steps[\"PipelineExecutionSteps\"]):\n",
    "    print(execution_step)\n",
    "    # We are doing this because there appears to be a bug of this LineageTableVisualizer handling the Processing Step\n",
    "    if execution_step[\"StepName\"] == \"Processing\":\n",
    "        processing_job_name = execution_step[\"Metadata\"][\"ProcessingJob\"][\"Arn\"].split(\"/\")[-1]\n",
    "        print(processing_job_name)\n",
    "        display(viz.show(processing_job_name=processing_job_name))\n",
    "    elif execution_step[\"StepName\"] == \"Train\":\n",
    "        training_job_name = execution_step[\"Metadata\"][\"TrainingJob\"][\"Arn\"].split(\"/\")[-1]\n",
    "        print(training_job_name)\n",
    "        display(viz.show(training_job_name=training_job_name))\n",
    "    else:\n",
    "        display(viz.show(pipeline_execution_step=execution_step))\n",
    "        time.sleep(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analyze Experiment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker.analytics import ExperimentAnalytics\n",
    "\n",
    "time.sleep(30)  # avoid throttling exception\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "pd.set_option(\"max_colwidth\", 500)\n",
    "\n",
    "experiment_analytics = ExperimentAnalytics(\n",
    "    experiment_name=pipeline_experiment_name,\n",
    ")\n",
    "\n",
    "experiment_analytics.dataframe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Release Resources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%html\n",
    "\n",
    "<p><b>Shutting down your kernel for this notebook to release resources.</b></p>\n",
    "<button class=\"sm-command-button\" data-commandlinker-command=\"kernelmenu:shutdown\" style=\"display:none;\">Shutdown Kernel</button>\n",
    "\n",
    "<script>\n",
    "try {\n",
    "    els = document.getElementsByClassName(\"sm-command-button\");\n",
    "    els[0].click();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}    \n",
    "</script>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%javascript\n",
    "\n",
    "try {\n",
    "    Jupyter.notebook.save_checkpoint();\n",
    "    Jupyter.notebook.session.delete();\n",
    "}\n",
    "catch(err) {\n",
    "    // NoOp\n",
    "}"
   ]
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "Python 3 (Data Science)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
